#importing all necessary libraries
import pandas as pd
import scipy
import requests
from bs4 import BeautifulSoup
import wget
import os
import glob
import warnings
warnings.filterwarnings('ignore')
import quandl
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import chart_studio.plotly as py
#######SKIP THIS CELL IF FILES ALREADY LOADED IN FILE SYSTEM######
def scrape_filenames(url):
"""A function to scrape the names of all the files from the NOAA html page.
"""
website = requests.get(url)
content = website.content
soup = BeautifulSoup(content,features="html.parser")
#finds the table with all of the file names and links
table = soup.find_all('table')[0]
table_rows = table.find_all('tr')
#iterates through each row in the html table and selects the column 0 value in each row, which is the file name.
file_names = []
for tr in table_rows: #gets the data from each tr, essentially getting the stats for each year
row_values = [td.get_text() for td in tr] # creates a list of each data entry in the given table row
file_name = row_values[0]
file_names.append(file_name)
#returns the 'details' files, the only ones we need for this project
filtered_filenames = list(filter(lambda name: "details" in name, file_names))
return filtered_filenames
def download_files(file_names, website_url):
"""Downloads all of the compressed csv files for each year data was collected
"""
#downloads all the files with the specified filenames and url and names the files by year. Takes advantage of
#the fact that the data in the html table is already sorted by year.
year = 1950
for name in file_names:
#checks to see if the file already exists. If it does it deletes the old verion and downloads the new
if os.path.isfile('./data/{}.csv.gz'.format(year)):
os.remove('./data/{}.csv.gz'.format(year))
file = wget.download(website_url + name, out='./data/{}.csv.gz'.format(year))
year += 1
#downloads the file when it doesn't already exist.
else:
file = wget.download(website_url + name, out='./data/{}.csv.gz'.format(year))
year += 1
def get_data(url):
#scrapes the file names and downloads the files using the two functions defined above.
file_names = scrape_filenames(url)
download_files(file_names, url)
url = r'https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/'
get_data(url)
#Loading the data into pandas DataFrames. This cell takes some time to run, so after this cell I copy the DataFrame
#so I don't have to keep running this cell if I need to reset the dataframe.
file_names = glob.glob("./data/*.csv.gz") #gets the name of all of the files compressed csv files in the data folder
years = [name[7:11] for name in file_names] #selects the year from each of the filenames
full_df = pd.DataFrame()
#creates a dataframe for the year of data being read in, adds a column indicating the year, then concatenates it to the
#master dataframe.
for i in range(len(file_names)):
df = pd.read_csv(file_names[i], compression="gzip")
df['Year'] = years[i]
full_df = pd.concat([full_df, df])
full_df.head()
#A look at all of the columns
master_df = full_df.copy()
print(master_df.columns)
#reading in the csv from the data folder. The data starts on the 57th row
co2_data = pd.read_csv('data/monthly_flask_co2_ljo.csv', skiprows=56)
# the first two rows are just heading
co2_data = co2_data.iloc[2:]
co2_data = co2_data.astype('float')
co2_data[' Yr'] = co2_data[' Yr'].astype('int64')
#renaming the columns I want to select, selecting data from 1995 or after, and aggregating the CO2 level by year.
co2_data.columns = ['YEAR', ' Mn', 'Date', 'Date2', ' CO2', 'seasonally',
' fit', ' seasonally', 'CO2 (PPM)', ' seasonally']
co2_data = co2_data[co2_data['YEAR'] >= 1995]
co2_by_year = co2_data.groupby('YEAR', as_index=False).mean()
co2_by_year = co2_by_year[['YEAR', 'CO2 (PPM)']]
#quickly analyze the data to see the trend
co2_by_year.plot(x='YEAR', y='CO2 (PPM)')
#Taking a look at the dimesnions of the dataset
master_df.shape
#Splitting up 'BEGIN_YEARMONTH' to year and month
master_df['BEGIN_YEARMONTH'] = master_df['BEGIN_YEARMONTH'].astype(str)
master_df['BEGIN_YEAR'] = pd.to_numeric(master_df['BEGIN_YEARMONTH'].str[0:4])
master_df['BEGIN_MONTH'] = pd.to_numeric(master_df['BEGIN_YEARMONTH'].str[4:])
master_df.head()
#The first thing to do is get rid of the years we don't need. We will get rid of everything before 1995, since
#most types of weather events weren't recorded until after that.
master_df = master_df[master_df['BEGIN_YEAR'] >= 1995]
master_df.shape
#converting the units in the DAMAGES column from strings to numeric values with the correct units.
def get_units(string):
if string[-1] == "K":
return 1000
elif string[-1] == "M":
return 1000000
elif string[-1] == "B":
return 1000000000
else:
return 0
master_df['DAMAGE_PROPERTY'] = master_df['DAMAGE_PROPERTY'].astype(str)
#creating a column with the units based on the last letter in the property column
master_df['PROPERTY_UNITS'] = master_df['DAMAGE_PROPERTY'].apply(get_units)
#removes the letters from the DAMAGE_PROPERTY column
def remove_letters(string):
if string[-1] == "K" or string[-1] == "M" or string[-1] == "B":
return string[:-1]
else:
return string
#convert the DAMAGE_PROPERTY column to its absolute numeric value by multiplying the numeric col. by the units col.
master_df['DAMAGE_PROPERTY'] = master_df['DAMAGE_PROPERTY'].astype(str).apply(remove_letters)
master_df['DAMAGE_PROPERTY'] = pd.to_numeric(master_df['DAMAGE_PROPERTY'], errors='coerce') * pd.to_numeric(master_df['PROPERTY_UNITS'], errors='coerce')
#Thhis cell does the same as the one above but for the DAMAGE_CROPS column
master_df['DAMAGE_CROPS'] = master_df['DAMAGE_CROPS'].astype(str)
master_df['CROPS_UNITS'] = master_df['DAMAGE_CROPS'].apply(get_units)
master_df['DAMAGE_CROPS'] = master_df['DAMAGE_CROPS'].astype(str).apply(remove_letters)
master_df['DAMAGE_CROPS'] = pd.to_numeric(master_df['DAMAGE_CROPS'], errors='coerce') * pd.to_numeric(master_df['CROPS_UNITS'], errors='coerce')
#I will be fetching the CPI using a python API called quandl. To use this API I had register and create a free API key.
#I changed my API key after publishing this project, so running this cell with this API key WILL RETURN AN ERROR.
cpi_data = quandl.get("RATEINF/CPI_USA", api_key='rEu8n6m9G_ytM94NFpib')
cpi_df = pd.DataFrame(cpi_data)
cpi_df = cpi_df.reset_index()
cpi_df['Date'] = cpi_df['Date'].astype(str)
cpi_df.head()
#Parsing the the 'Date' column so I can merge it with the weather data based on year and month
cpi_df['YEAR'] = cpi_df['Date'].str[0:4].astype('int64')
cpi_df['MONTH'] = cpi_df['Date'].str[5:7].astype('int64')
cpi_df.columns = ['Date', 'CPI', 'YEAR', 'MONTH']
#merging the cpi data with the weather data based on Year and Month
master_df = master_df.merge(cpi_df, left_on=['BEGIN_YEAR', 'BEGIN_MONTH'], right_on=['YEAR', 'MONTH'])
master_df.head()
#We will be basing the inflation adjustment on 2016 since this was the most recent available estimate for the Value of
# a Statistical Life
master_df['BASE_CLI'] = master_df[master_df['BEGIN_YEAR'] == 2016]['CPI'].mean()
#The inflation multiplier is what every value will be multiplied by to adjust for inflation.
master_df['INFLATION_MULTIPLIER'] = ((master_df['BASE_CLI'] - master_df['CPI']) / master_df['CPI']) + 1
#Adjusting for inflation
master_df['ADJ_DAMAGE_CROPS'] = master_df['DAMAGE_CROPS'] * master_df['INFLATION_MULTIPLIER']
master_df['ADJ_DAMAGE_PROPERTY'] = master_df['DAMAGE_PROPERTY'] * master_df['INFLATION_MULTIPLIER']
#Define a new column for the damages in terms of lives lost using the VSL
master_df['DAMAGE_VSL'] = 9600000 * (master_df['DEATHS_DIRECT'] + master_df['DEATHS_INDIRECT'])
#Create a total damages column
master_df['TOTAL_MONETARY_DAMAGES'] = master_df['DAMAGE_VSL'] + master_df['ADJ_DAMAGE_CROPS'] + \
master_df['ADJ_DAMAGE_PROPERTY']
master_df['TOTAL_MONETARY_DAMAGES'].describe()
#Now that we have the essential information for completing the analysis, let's get rid of the columns we don't need.
master_df.columns
master_df = master_df[['BEGIN_YEAR', 'BEGIN_MONTH', 'BEGIN_DAY', 'BEGIN_TIME','END_DAY', 'END_TIME', 'STATE', 'STATE_FIPS',
'MONTH_NAME', 'EVENT_TYPE', 'BEGIN_DATE_TIME', 'END_DATE_TIME','INJURIES_DIRECT',
'INJURIES_INDIRECT', 'DEATHS_DIRECT', 'DEATHS_INDIRECT', 'ADJ_DAMAGE_PROPERTY', 'ADJ_DAMAGE_CROPS',
'BEGIN_LAT', 'BEGIN_LON', 'END_LAT', 'END_LON', 'EPISODE_NARRATIVE', 'EVENT_NARRATIVE',
'DAMAGE_VSL', 'TOTAL_MONETARY_DAMAGES']]
#Let's take a look at the 10 most damaging events occording to our scale
master_df.sort_values(by='TOTAL_MONETARY_DAMAGES', ascending=False).head(10)[['BEGIN_YEAR', 'BEGIN_MONTH', 'BEGIN_DAY',
'STATE', 'EVENT_TYPE', 'EPISODE_NARRATIVE',
'EVENT_NARRATIVE']]
#To address the above stated issue, we need to combine the rows that stem from the same weather event. We do this by
#aggregating the rows that occur at roughly the same time, location and are the same type of event.
grouped_df = master_df.groupby(['BEGIN_YEAR', 'BEGIN_MONTH', 'BEGIN_DAY','EVENT_TYPE', 'STATE', 'STATE_FIPS', 'MONTH_NAME'], \
as_index=False).agg({'BEGIN_TIME':'mean', 'INJURIES_DIRECT':'sum', 'INJURIES_INDIRECT':
'sum', 'DEATHS_DIRECT': 'sum', 'DEATHS_INDIRECT': 'sum',
'ADJ_DAMAGE_PROPERTY': 'sum', 'ADJ_DAMAGE_CROPS': 'sum',
'DAMAGE_VSL': 'sum', 'TOTAL_MONETARY_DAMAGES': 'sum',
'EPISODE_NARRATIVE': 'first', 'EVENT_NARRATIVE': 'first',
'BEGIN_LAT': 'mean', 'BEGIN_LON': 'mean'
})
grouped_df.shape
#A look at the 10 most damaging weather events
grouped_df.sort_values(by='TOTAL_MONETARY_DAMAGES', ascending=False).head(10)[['BEGIN_YEAR', 'BEGIN_MONTH',
'STATE', 'EVENT_TYPE', 'EPISODE_NARRATIVE',
'EVENT_NARRATIVE']]
# This function groups similat weather events into one of 7 classes.
def classify_weather(data):
if data in ["Hail", 'Heavy Snow', 'Winter Weather', 'Winter Storm', 'Frost/Freeze', 'Blizzard', 'Ice Storm',
'Frost/Freeze', 'Cold/Wind Chill', 'Extreme Cold/Wind Chill', 'Lake-Effect Snow', 'Avalanche',
'Marine Hail', 'Sleet', 'Freezing Fog', 'High Snow', 'HAIL/ICY ROADS']:
return "Winter Weather Event"
elif data in ['Thunderstorm Wind', 'High Wind', 'Lightning', 'Marine Thunderstorm Wind', 'THUNDERSTORM WINDS FUNNEL CLOU',
'THUNDERSTORM WINDS LIGHTNING', 'Marine Lightning', 'THUNDERSTORM WINDS/HEAVY RAIN', 'Heavy Rain',
'THUNDERSTORM WINDS/ FLOOD', 'THUNDERSTORM WINDS HEAVY RAIN', 'THUNDERSTORM WIND/ TREE',
'THUNDERSTORM WINDS/FLOODING', 'THUNDERSTORM WIND/ TREES']:
return "Thunderstorms and Lightning"
elif data in ['Marine Hurricane/Typhoon', 'Tropical Storm', 'Tropical Depression', 'Hurricane',
'Hurricane (Typhoon)', 'Marine Tropical Depression']:
return 'Tropical Storm or Hurricane'
elif data in ['Wildfire', 'Heat', 'Drought', 'Dense Smoke', 'Excessive Heat']:
return "Drought, Heat, and Fire"
elif data in ['Flash Flood', 'Flood', 'Coastal Flood', 'Debris Flow', 'Tsunami', 'Lakeshore Flood',
'THUNDERSTORM WINDS/FLASH FLOOD', 'HAIL FLOODING']:
return "Flood"
elif data in ['Tornado', 'TORNADOES, TSTM WIND, HAIL', 'TORNADO/WATERSPOUT', 'Waterspout']:
return "Tornado"
else :
return "Other"
#applying the above method to the 'EVENT_TYPE' column and assigning it to the 'EVENT_CATEGORY' column
grouped_df['EVENT_CATEGORY'] = grouped_df['EVENT_TYPE'].apply(classify_weather)
grouped_df.head()
#Lets first ake a look at how how the weather evetns are broken down by frequency of occurance and by the damages
#they caused.
event_counts = grouped_df.groupby('EVENT_CATEGORY').count()
event_damages = grouped_df.groupby('EVENT_CATEGORY').sum()
fig = make_subplots(
rows=2, cols=1,
specs=[[{"type": "domain"}], [{"type": "domain"}]],
subplot_titles = ("Number of Occurrences of Each Type of Weather Event", "Total Damage Caused by Each Event Type")
)
#Pie chart based on the number of each kind of event
fig.add_trace(
go.Pie(labels=event_counts.index, values=event_counts['BEGIN_YEAR'], textinfo='label+percent'),
row=1, col=1
)
#Pie chart based on the total monetary damages of each event
fig.add_trace(
go.Pie(labels=event_counts.index, values=event_damages['TOTAL_MONETARY_DAMAGES'], textinfo='label+percent'),
row=2, col=1
)
fig.update_layout(height=1000)
fig.show()
fig = go.Figure()
unique_events = ['Tornado', 'Winter Weather Event', 'Thunderstorms and Lightning',
'Flood', 'Drought, Heat, and Fire',
'Tropical Storm or Hurricane', 'Other']
#aggregates the economic damages based on year
event_years = grouped_df.groupby('BEGIN_YEAR').agg({'TOTAL_MONETARY_DAMAGES': 'sum','BEGIN_DAY': 'count'})
#drops the last two years since they have incomplete data
event_years = event_years.drop([2019, 2020], axis=0)
#adds a plot for the total economic damages, regardless of event type
fig.add_trace(
go.Scatter(x=event_years.index, y=event_years['TOTAL_MONETARY_DAMAGES'], mode="lines+markers",
name='All EVENTS', visible=False)
)
#loops through the the event types and adds a trace for all of them
for event_type in unique_events:
event_years = grouped_df[grouped_df['EVENT_CATEGORY'] == event_type].groupby('BEGIN_YEAR').agg(
{'TOTAL_MONETARY_DAMAGES': 'sum','BEGIN_DAY': 'count'})
fig.add_trace(
go.Scatter(x=event_years.index, y=event_years['TOTAL_MONETARY_DAMAGES'], mode="lines+markers",
name=event_type, visible=False)
)
#This creates the dropdown menu and makes only one visible at a time
fig.update_layout(updatemenus = list([
dict(active=-1,
buttons=list([
dict(label = 'All Events',
method = 'update',
args = [{'visible': [True, False, False, False, False, False, False, False]},
{'title': 'All Events'}]),
dict(label = 'Tornado',
method = 'update',
args = [{'visible': [False, True, False, False, False, False, False, False]},
{'title': 'Tornado'}]),
dict(label = 'Winter Weather Event',
method = 'update',
args = [{'visible': [False, False, True, False, False, False, False, False]},
{'title': 'Winter Weather Event'}]),
dict(label = 'Thunderstorms and Lightning',
method = 'update',
args = [{'visible': [False, False, False, True, False, False, False, False]},
{'title': 'Thunderstorms and Lightning'}]),
dict(label = 'Flood',
method = 'update',
args = [{'visible': [False, False, False, False, True, False, False, False]},
{'title': 'Flood'}]),
dict(label = 'Drought, Heat, and Fire',
method = 'update',
args = [{'visible': [False, False, False, False, False, True, False, False]},
{'title': 'Drought, Heat, and Fire'}]),
dict(label = 'Tropical Storm or Hurricane',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, True, False]},
{'title': 'Tropical Storm or Hurricane'}]),
dict(label = 'Other',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, False, True]},
{'title': 'Other'}])
]),
)
]))
fig.update_layout(height=500,
title_text = "Economic Damages by Year",
xaxis_title="Year",
yaxis_title="Economic Damages(USD)")
fig.show()
#Plots the 1000 most damaging events for each category on a map of the United States.
#The tropical storms and the drought/fires are not present here since the dataset does not contain longitudes and
#latitudes for those.
fig = go.Figure()
#This column, TEXT, will be what appears when you hover over each data point
grouped_df['TEXT'] = 'Event Category: ' + grouped_df['EVENT_TYPE'].astype(str) + "<br>" + \
'Event Description: ' + grouped_df['EVENT_NARRATIVE'].astype(str).str[0:60] + "..." + "<br>" + \
'Event Date: ' + grouped_df['BEGIN_MONTH'].astype(str) + "/" + grouped_df['BEGIN_YEAR'].astype(str) + "<br>" +\
'Economic Damages: ' + grouped_df['TOTAL_MONETARY_DAMAGES'].astype(str)
#these are the individual dataframes for each of the event types
trop_storm = grouped_df[grouped_df['EVENT_CATEGORY'] == 'Tropical Storm or Hurricane'].sort_values(by='TOTAL_MONETARY_DAMAGES',
ascending=False).head(1000)
fire = grouped_df[grouped_df['EVENT_CATEGORY'] == 'Drought, Heat, and Fire'].sort_values(by='TOTAL_MONETARY_DAMAGES',
ascending=False).head(1000)
cold_storm = grouped_df[grouped_df['EVENT_CATEGORY'] == 'Winter Weather Event'].sort_values(by='TOTAL_MONETARY_DAMAGES',
ascending=False).head(1000)
tornado = grouped_df[grouped_df['EVENT_CATEGORY'] == 'Tornado'].sort_values(by='TOTAL_MONETARY_DAMAGES',
ascending=False).head(1000)
flood = grouped_df[grouped_df['EVENT_CATEGORY'] == 'Flood'].sort_values(by='TOTAL_MONETARY_DAMAGES',
ascending=False).head(1000)
thunder = grouped_df[grouped_df['EVENT_CATEGORY'] == 'Thunderstorms and Lightning'].sort_values(by='TOTAL_MONETARY_DAMAGES',
ascending=False).head(1000)
other = grouped_df[grouped_df['EVENT_CATEGORY'] == 'Other'].sort_values(by='TOTAL_MONETARY_DAMAGES',
ascending=False).head(1000)
#makes it so the circles on the chart are a reasonable size
scale = 10000000
#adds the plots for each type of weather event
fig.add_trace(
go.Scattergeo(lon = cold_storm['BEGIN_LON'],
lat = cold_storm['BEGIN_LAT'],
text = cold_storm['TEXT'],
marker = dict(
size = cold_storm['TOTAL_MONETARY_DAMAGES']/scale,
#color = colors[i],
line_color='rgb(40,40,40)',
line_width=0.5,
sizemode = 'area'),
name='Winter Weather Event',
visible=False))
fig.add_trace(
go.Scattergeo(lon = tornado['BEGIN_LON'],
lat = tornado['BEGIN_LAT'],
text = tornado['TEXT'],
marker = dict(
size = tornado['TOTAL_MONETARY_DAMAGES']/scale,
#color = colors[i],
line_color='rgb(40,40,40)',
line_width=0.5,
sizemode = 'area'),
name='Tornado',
visible=False))
fig.add_trace(
go.Scattergeo(lon = flood['BEGIN_LON'],
lat = flood['BEGIN_LAT'],
text = flood['TEXT'],
marker = dict(
size = flood['TOTAL_MONETARY_DAMAGES']/scale,
#color = colors[i],
line_color='rgb(40,40,40)',
line_width=0.5,
sizemode = 'area'),
name='Flood',
visible=False))
fig.add_trace(
go.Scattergeo(lon = thunder['BEGIN_LON'],
lat = thunder['BEGIN_LAT'],
text = thunder['TEXT'],
marker = dict(
size = thunder['TOTAL_MONETARY_DAMAGES']/scale,
#color = colors[i],
line_color='rgb(40,40,40)',
line_width=0.5,
sizemode = 'area'),
name='Thunderstorms and Lightning',
visible=False))
fig.add_trace(
go.Scattergeo(lon = other['BEGIN_LON'],
lat = other['BEGIN_LAT'],
text = other['TEXT'],
marker = dict(
size = other['TOTAL_MONETARY_DAMAGES']/scale,
#color = colors[i],
line_color='rgb(40,40,40)',
line_width=0.5,
sizemode = 'area'),
name='Other',
visible=False))
#creates the dropdown menu
fig.update_layout(updatemenus = list([
dict(active=-1,
buttons=list([
dict(label = 'Winter Weather Event',
method = 'update',
args = [{'visible': [True, False, False, False, False]},
]),
dict(label = 'Tornado',
method = 'update',
args = [{'visible': [False, True, False, False, False]},
]),
dict(label = 'Flood',
method = 'update',
args = [{'visible': [False, False, True, False, False]},
]),
dict(label = 'Thunderstorms and Lightning',
method = 'update',
args = [{'visible': [False, False, False, True, False]},
]),
dict(label = 'Other',
method = 'update',
args = [{'visible': [False, False, False, False, True]},
]),
dict(label = 'All Events',
method = 'update',
args = [{'visible': [True, True, True, True, True]},
]),
]),
)
]))
fig.update_layout(title_text="Location and Economic Damage of Weather Events",
geo = dict(scope = 'usa', landcolor = 'rgb(217, 217, 217)'))
fig.show()
###Graphing by State
fig = go.Figure()
#This column will be what appears when you hover over the point
#creates individual dataframes for each kind of event
trop_storm = trop_storm.groupby('STATE').sum().sort_values(by='TOTAL_MONETARY_DAMAGES', ascending=False).head(10)
fire = fire.groupby('STATE').sum().sort_values(by='TOTAL_MONETARY_DAMAGES', ascending=False).head(10)
cold_storm = cold_storm.groupby('STATE').sum().sort_values(by='TOTAL_MONETARY_DAMAGES', ascending=False).head(10)
tornado = tornado.groupby('STATE').sum().sort_values(by='TOTAL_MONETARY_DAMAGES', ascending=False).head(10)
flood = flood.groupby('STATE').sum().sort_values(by='TOTAL_MONETARY_DAMAGES', ascending=False).head(10)
thunder = thunder.groupby('STATE').sum().sort_values(by='TOTAL_MONETARY_DAMAGES', ascending=False).head(10)
other = other.groupby('STATE').sum().sort_values(by='TOTAL_MONETARY_DAMAGES', ascending=False).head(10)
all_df = grouped_df.groupby('STATE').sum().sort_values(by='TOTAL_MONETARY_DAMAGES', ascending=False).head(10)
#adds a trace based on each type of weather event
fig.add_trace(
go.Bar(x = all_df.index,
y = all_df['TOTAL_MONETARY_DAMAGES'],
name='All',
visible=False,
marker_color='red'))
fig.add_trace(
go.Bar(x = trop_storm.index,
y = trop_storm['TOTAL_MONETARY_DAMAGES'],
name='Tropical Storm or Hurricane',
visible=False,
marker_color='orange'))
fig.add_trace(
go.Bar(x = fire.index,
y = fire['TOTAL_MONETARY_DAMAGES'],
name='Drought, Heat, or Fire',
visible=False,
marker_color='yellow'))
fig.add_trace(
go.Bar(x = cold_storm.index,
y = cold_storm['TOTAL_MONETARY_DAMAGES'],
name='Winter Weather Event',
visible=False,
marker_color='green'))
fig.add_trace(
go.Bar(x = tornado.index,
y = tornado['TOTAL_MONETARY_DAMAGES'],
name='Tornado',
visible=False,
marker_color='blue'))
fig.add_trace(
go.Bar(x = flood.index,
y = flood['TOTAL_MONETARY_DAMAGES'],
name='Flood',
visible=False,
marker_color='purple'))
fig.add_trace(
go.Bar(x = thunder.index,
y = thunder['TOTAL_MONETARY_DAMAGES'],
name='Thunderstorms and Lightning',
visible=False,
marker_color='olive'))
fig.add_trace(
go.Bar(x = other.index,
y = other['TOTAL_MONETARY_DAMAGES'],
name='Other',
visible=False,
marker_color='dimgrey'))
#adds the dropdown menu
fig.update_layout(updatemenus = list([
dict(active=-1,
buttons=list([
dict(label = 'All Events',
method = 'update',
args = [{'visible': [True, False, False, False, False, False, False, False]},
]),
dict(label = 'Tropical Storm or Hurricane',
method = 'update',
args = [{'visible': [False, True, False, False, False, False, False, False]},
]),
dict(label = 'Drought, Heat, and Fire',
method = 'update',
args = [{'visible': [False, False, True, False, False, False, False, False]},
]),
dict(label = 'Winter Weather Event',
method = 'update',
args = [{'visible': [False, False, False, True, False, False, False, False]},
]),
dict(label = 'Tornado',
method = 'update',
args = [{'visible': [False, False, False, False, True, False, False, False]},
]),
dict(label = 'Flood',
method = 'update',
args = [{'visible': [False, False, False, False, False, True, False, False]},
]),
dict(label = 'Thunderstorms and Lightning',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, True, False]},
]),
dict(label = 'Other',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, False, True]},
])
]),
)
]))
fig.update_layout(title="10 States With the Most Economic Damages",
yaxis_title='Economic Damages (USD)', xaxis_title='State')
fig.show()
#Merging the atmospheric data and the weather event data
yearly_df = grouped_df.groupby(['BEGIN_YEAR','STATE', 'EVENT_CATEGORY'], as_index=False).agg({'TOTAL_MONETARY_DAMAGES':'sum'})
damages_and_co2 = yearly_df.merge(co2_by_year, left_on='BEGIN_YEAR', right_on='YEAR')
damages_and_co2 = damages_and_co2[['YEAR', 'STATE', 'EVENT_CATEGORY', 'TOTAL_MONETARY_DAMAGES', 'CO2 (PPM)']]
#plotting a trendline for the economic damages by year and the CO2 levels
by_year_only = damages_and_co2.groupby('YEAR', as_index=False).agg({'TOTAL_MONETARY_DAMAGES':'sum', 'CO2 (PPM)':'mean'})
fig = px.scatter(by_year_only, x='TOTAL_MONETARY_DAMAGES',
y='CO2 (PPM)',
hover_data = ['YEAR'],
trendline='ols')
fig.update_layout(title='CO2 levels vs. Economic Damages', xaxis_title='Economic Damages (USD)')
fig.show()
year_and_category = damages_and_co2.groupby(['YEAR', 'EVENT_CATEGORY'], as_index=False).agg({
'TOTAL_MONETARY_DAMAGES':'sum', 'CO2 (PPM)':'mean'})
#creates a list of tuples of the category and the the correlation coefiicient for that category
corr_coefs = []
for category in year_and_category['EVENT_CATEGORY'].unique():
cat_df = year_and_category[year_and_category['EVENT_CATEGORY'] == category]
r_square, _ = scipy.stats.pearsonr(cat_df['TOTAL_MONETARY_DAMAGES'], cat_df['CO2 (PPM)'])
corr_coefs.append((category, r_square))
#adds the correlation between all damages and CO2 levels to the list of correlation coefficients
total_r_square, _ = scipy.stats.pearsonr(year_and_category['TOTAL_MONETARY_DAMAGES'], year_and_category['CO2 (PPM)'])
corr_coefs.append(('All Events', total_r_square))
#creates a bar chart of the correlations between damages caused by each type of weather event and CO2 levels.
correlations = pd.DataFrame(data=corr_coefs, columns=['EVENT_TYPE', 'CORRELATION COEFFICIENT']).sort_values(by='CORRELATION COEFFICIENT')
fig = px.bar(correlations,
x = 'EVENT_TYPE',
y = 'CORRELATION COEFFICIENT',
color='CORRELATION COEFFICIENT',
color_continuous_scale='Inferno')
fig.update_layout(title='CO2 and Economic Damage Correlation Coefficents by Weather Event',
yaxis_title="Pearson's Correlation Coeffcient",
xaxis_title='Weather Event Category')
fig.show()